home *** CD-ROM | disk | FTP | other *** search
/ Personal Computer World 2008 February / PCWFEB08.iso / Software / Freeware / Miro 1.0 / Miro_Installer.exe / Miro_Downloader.exe / flashscraper.pyc (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2007-11-12  |  9.4 KB  |  252 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.5)
  3.  
  4. import re
  5. import httpclient
  6. import urlparse
  7. import cgi
  8. from xml.dom import minidom
  9. from urllib import unquote_plus
  10. from util import checkU, returnsUnicode
  11.  
  12. def tryScrapingURL(url, callback):
  13.     checkU(url)
  14.     scrape = _getScrapeFunctionFor(url)
  15.     if scrape is not None:
  16.         None((scrape, url), (lambda x: _actualURLCallback(url, callback, x)))
  17.     else:
  18.         callback(url)
  19.  
  20.  
  21. def _actualURLCallback(url, callback, newURL):
  22.     if newURL:
  23.         checkU(newURL)
  24.     
  25.     callback(newURL, contentType = u'video/x-flv')
  26.  
  27.  
  28. def _getScrapeFunctionFor(url):
  29.     checkU(url)
  30.     for scrapeInfo in scraperInfoMap:
  31.         if re.compile(scrapeInfo['pattern']).match(url) is not None:
  32.             return scrapeInfo['func']
  33.             continue
  34.     
  35.  
  36.  
  37. def _scrapeYouTubeURL(url, callback):
  38.     checkU(url)
  39.     httpclient.grabHeaders((url,), ((lambda x: _youTubeCallback(x, callback)),), (lambda x: _youTubeErrback(x, callback)))
  40.  
  41.  
  42. def _youTubeCallback(info, callback):
  43.     url = info['redirected-url']
  44.     
  45.     try:
  46.         components = urlparse.urlsplit(url)
  47.         params = cgi.parse_qs(components[3])
  48.         videoID = params['video_id'][0]
  49.         t = params['t'][0]
  50.         url = u'http://youtube.com/get_video.php?video_id=%s&t=%s' % (videoID, t)
  51.         callback(url)
  52.     except:
  53.         print 'DTV: WARNING, unable to scrape You Tube Video URL: %s' % url
  54.         callback(None)
  55.  
  56.  
  57.  
  58. def _youTubeErrback(err, callback):
  59.     print 'DTV: WARNING, network error scraping You Tube Video URL'
  60.     callback(None)
  61.  
  62.  
  63. def _scrapeGoogleVideoURL(url, callback):
  64.     
  65.     try:
  66.         components = urlparse.urlsplit(url)
  67.         params = cgi.parse_qs(components[3])
  68.         docId = params['docId'][0]
  69.         url = u'http://video.google.com/videofile/%s.flv?docid=%s&itag=5' % (docId, docId)
  70.         callback(url)
  71.     except:
  72.         print 'DTV: WARNING, unable to scrape Google Video URL: %s' % url
  73.         callback(None)
  74.  
  75.  
  76.  
  77. def _scrapeLuLuVideoURL(url, callback):
  78.     
  79.     try:
  80.         components = urlparse.urlsplit(url)
  81.         params = cgi.parse_qs(components[3])
  82.         url = unquote_plus(params['file'][0]).decode('ascii', 'replace')
  83.         callback(url)
  84.     except:
  85.         print 'DTV: WARNING, unable to scrape LuLu.tv Video URL: %s' % url
  86.         callback(None)
  87.  
  88.  
  89.  
  90. def _scrapeVMixVideoURL(url, callback):
  91.     
  92.     try:
  93.         components = urlparse.urlsplit(url)
  94.         params = cgi.parse_qs(components[3])
  95.         t = params['type'][0]
  96.         ID = params['id'][0]
  97.         l = params['l'][0]
  98.         url = u'http://sdstage01.vmix.com/videos.php?type=%s&id=%s&l=%s' % (t, ID, l)
  99.         httpclient.grabURL((url,), ((lambda x: _scrapeVMixCallback(x, callback)),), (lambda x: _scrapeVMixErrback(x, callback)))
  100.     except:
  101.         print 'DTV: WARNING, unable to scrape VMix Video URL: %s' % url
  102.         callback(None)
  103.  
  104.  
  105.  
  106. def _scrapeVMixCallback(info, callback):
  107.     
  108.     try:
  109.         doc = minidom.parseString(info['body'])
  110.         url = doc.getElementsByTagName('file').item(0).firstChild.data.decode('ascii', 'replace')
  111.         callback(url)
  112.     except:
  113.         print 'DTV: WARNING, unsable to scrape XML for VMix Video URL %s' % info['redirected-url']
  114.         callback(None)
  115.  
  116.  
  117.  
  118. def _scrapeVMixErrback(err, callback):
  119.     print 'DTV: WARNING, network error scraping VMix Video URL'
  120.     callback(None)
  121.  
  122.  
  123. def _scrapeDailyMotionVideoURL(url, callback):
  124.     httpclient.grabHeaders((url,), ((lambda x: _scrapeDailyMotionCallback(x, callback)),), (lambda x: _scrapeDailyMotionErrback(x, callback)))
  125.  
  126.  
  127. def _scrapeDailyMotionCallback(info, callback):
  128.     url = info['redirected-url']
  129.     
  130.     try:
  131.         components = urlparse.urlsplit(url)
  132.         params = cgi.parse_qs(components[3])
  133.         url = unquote_plus(params['url'][0]).decode('ascii', 'replace')
  134.         callback(url)
  135.     except:
  136.         print 'DTV: WARNING, unable to scrape Daily Motion URL: %s' % url
  137.         callback(None)
  138.  
  139.  
  140.  
  141. def _scrapeDailyMotionErrback(info, callback):
  142.     print 'DTV: WARNING, network error scraping Daily Motion Video URL'
  143.     callback(None)
  144.  
  145.  
  146. def _scrapeVSocialVideoURL(url, callback):
  147.     
  148.     try:
  149.         components = urlparse.urlsplit(url)
  150.         params = cgi.parse_qs(components[3])
  151.         v = params['v'][0]
  152.         url = u'http://static.vsocial.com/varmedia/vsocial/flv/%s_out.flv' % v
  153.         callback(url)
  154.     except:
  155.         print 'DTV: WARNING, unable to scrape VSocial URL: %s' % url
  156.         callback(None)
  157.  
  158.  
  159.  
  160. def _scrapeVeohTVVideoURL(url, callback):
  161.     
  162.     try:
  163.         components = urlparse.urlsplit(url)
  164.         params = cgi.parse_qs(components[3])
  165.         t = params['type'][0]
  166.         permalinkId = params['permalinkId'][0]
  167.         url = u'http://www.veoh.com/movieList.html?type=%s&permalinkId=%s&numResults=45' % (t, permalinkId)
  168.         httpclient.grabURL((url,), ((lambda x: _scrapeVeohTVCallback(x, callback)),), (lambda x: _scrapeVeohTVErrback(x, callback)))
  169.     except:
  170.         print 'DTV: WARNING, unable to scrape Veoh URL: %s' % url
  171.         callback(None)
  172.  
  173.  
  174.  
  175. def _scrapeVeohTVCallback(info, callback):
  176.     url = info['redirected-url']
  177.     
  178.     try:
  179.         params = cgi.parse_qs(info['body'])
  180.         fileHash = params['previewHashLow'][0]
  181.         if fileHash[-1] == ',':
  182.             fileHash = fileHash[:-1]
  183.         
  184.         url = u'http://ll-previews.veoh.com/previews/get.jsp?fileHash=%s' % fileHash
  185.         callback(url)
  186.     except:
  187.         print 'DTV: WARNING, unable to scrape Veoh URL data: %s' % url
  188.         callback(None)
  189.  
  190.  
  191.  
  192. def _scrapeVeohTVErrback(err, callback):
  193.     print 'DTV: WARNING, network error scraping Veoh TV Video URL'
  194.     callback(None)
  195.  
  196.  
  197. def _scrapeBreakVideoURL(url, callback):
  198.     httpclient.grabHeaders((url,), ((lambda x: _scrapeBreakCallback(x, callback)),), (lambda x: _scrapeBreakErrback(x, callback)))
  199.  
  200.  
  201. def _scrapeBreakCallback(info, callback):
  202.     url = info['redirected-url']
  203.     
  204.     try:
  205.         components = urlparse.urlsplit(url)
  206.         params = cgi.parse_qs(components[3])
  207.         url = unquote_plus(params['sVidLoc'][0]).decode('ascii', 'replace')
  208.         callback(url)
  209.     except:
  210.         print 'DTV: WARNING, unable to scrape Break URL: %s' % url
  211.         callback(None)
  212.  
  213.  
  214.  
  215. def _scrapeBreakErrback(info, callback):
  216.     print 'DTV: WARNING, network error scraping Break Video URL'
  217.     callback(None)
  218.  
  219.  
  220. def _scrapeGreenPeaceVideoURL(url, callback):
  221.     print 'DTV: Warning, unable to scrape Green peace Video URL %s' % url
  222.     print callback(None)
  223.  
  224. scraperInfoMap = [
  225.     {
  226.         'pattern': 'http://([^/]+\\.)?youtube.com/(?!get_video\\.php)',
  227.         'func': _scrapeYouTubeURL },
  228.     {
  229.         'pattern': 'http://video.google.com/googleplayer.swf',
  230.         'func': _scrapeGoogleVideoURL },
  231.     {
  232.         'pattern': 'http://([^/]+\\.)?lulu.tv/wp-content/flash_play/flvplayer',
  233.         'func': _scrapeLuLuVideoURL },
  234.     {
  235.         'pattern': 'http://([^/]+\\.)?vmix.com/flash/super_player.swf',
  236.         'func': _scrapeVMixVideoURL },
  237.     {
  238.         'pattern': 'http://([^/]+\\.)?dailymotion.com/swf',
  239.         'func': _scrapeDailyMotionVideoURL },
  240.     {
  241.         'pattern': 'http://([^/]+\\.)?vsocial.com/flash/vp.swf',
  242.         'func': _scrapeVSocialVideoURL },
  243.     {
  244.         'pattern': 'http://([^/]+\\.)?veoh.com/multiplayer.swf',
  245.         'func': _scrapeVeohTVVideoURL },
  246.     {
  247.         'pattern': 'http://([^/]+\\.)?greenpeaceweb.org/GreenpeaceTV1Col.swf',
  248.         'func': _scrapeGreenPeaceVideoURL },
  249.     {
  250.         'pattern': 'http://([^/]+\\.)?break.com/',
  251.         'func': _scrapeBreakVideoURL }]
  252.